#!/bin/bash
rootdir=`pwd|tr -d '\n'`

pydir=$rootdir/src/python
dataroot=$rootdir/data
zhidaoroot=$dataroot/zhidao
zhidaoxmldir=$zhidaoroot/xml
zhidaopagedir=$zhidaoroot/page

mkdir -p $dataroot
mkdir -p $rootdir
mkdir -p $zhidaoxmldir

files=`ls $zhidaopagedir`

for file in ${files}
do
	path=$zhidaoxmldir/${file}.xml
	url=`$pydir/unquote.py $file`
	now=`date`
	echo "<wenda from=\"$url\" timestamp=\"$now\">" >> $path
	cat $zhidaopagedir/${file} | xargs -0 $pydir/extract.py '<cq>(?P<question>.*?)</cq>' |tr -d '\n' | xargs -0 $pydir/trimtag.py >> $path
	echo "" >> $zhidaoxmldir/${file}.xml
	cat $zhidaopagedir/${file} | xargs -0 $pydir/extract.py '<cd>(?P<question>.*?)</cd>' |tr -d '\n' | xargs -0 $pydir/trimtag.py >> $path
	echo "">> $zhidaoxmldir/${file}.xml
	cat $zhidaopagedir/${file} | xargs -0 $pydir/extract.py '<ca>(?P<answer>.*?)</ca>'  | xargs -0 $pydir/trimtag.py >> $path
	echo "</wenda>" | tr -d "\n">> $path
done